
/* 

Paper: Gentrification and pioneer businesses 
Authors: Behrens, Boulam, Martin, Mayneris 

Name dofile: identify_pioneers_negbin.do  
Version: 11 nov. 2021 

Output: dataset pioneers_final_negbin.dta with the baseline list of pioneer sectors  

Inputs:

 - pioneer_gentri90_only`n'_negbin_`v'_1990 with n in {pc, sh_edu, gentri} and v in {250, 500, cont} [produced in compute_elasticity_negbin.do] [publicly available]
 - naics02_NETSlabel.dta  [publicly available]

*/ 


foreach n in "250" "500" "cont"{
	foreach v in "pc" "sh_edu"{
	use pioneer_gentri90_only`n'_negbin_`v'_1990.dta,clear
	
	// we generate dummies for significance of coefficients
	g siginit`n'=abs(coef_`v'_gr_poor_init`n'/se_`v'_gr_poor_init`n')>2.576&coef_`v'_gr_poor_init`n'!=.
	g siginit_pcinc`n'=abs(coef_poor_init`n'/ se_poor_init`n')>2.576&coef_poor_init`n'!=.
	
	// we create dummies for pioneers 
	gen pioneer_`n'_`v'_high= (coef_poor_init`n'<0&siginit_pcinc`n'==1&coef_`v'_gr_poor_init`n'>0&siginit`n'==1)
	
	rename nb_block_active nb_block_active`v'
	save pioneers`n'_negbin_`v'.dta, replace 
		}
	}
	
foreach n in "250" "500" "cont"{
	foreach v in "gentri"{
	use pioneer_gentri90_only`n'_negbin_`v'_1990.dta, clear
	
	// we generate dummies for significance of coefficients
	g siginit`n'=abs(coef_`v'_init`n'/se_`v'_init`n')>2.576&coef_`v'_init`n'!=.
	g siginit_pcinc`n'=abs(coef_poor_init`n'/ se_poor_init`n')>2.576&coef_poor_init`n'!=.
	
	// we create dummies for pioneers 
	gen pioneer_`n'_`v'_high= (coef_poor_init`n'<0&siginit_pcinc`n'==1&coef_`v'_init`n'>0&siginit`n'==1)
	save pioneers`n'_negbin_`v'.dta, replace  
		}
	}

	// we define as pioneer sectors that appear as such for at least 2 out of the 3 estimations
	use pioneers250_negbin_gentri.dta, clear
	merge 1:1 naics using pioneers500_negbin_gentri.dta
	drop _m
	merge 1:1 naics using pioneerscont_negbin_gentri.dta
	drop _m
	foreach n in "250" "500" "cont"{
	foreach v in "pc" "sh_edu"{
	merge 1:1 naics using pioneers`n'_negbin_`v'.dta
	drop _m
		}
	}
	
	gen score=pioneer_250_pc_high+pioneer_500_pc_high+pioneer_cont_pc_high+pioneer_250_sh_edu_high+pioneer_500_sh_edu_high+pioneer_cont_sh_edu_high+pioneer_250_gentri_high+pioneer_500_gentri_high+pioneer_cont_gentri_high
	gen pioneer_high_negbin= (score>5&score!=.)
	
	// we merge with the naics label	
	g naics02= naics
	sort naics02
	merge 1:1 naics02 using naics02_NETSlabel.dta //label of the naics sector 
	drop if _m==2
	keep naics* pioneer_high_negbin score
	label var pioneer_high_negbin "Dummy is 1 if the sector is pioneers (baseline)"
	label var score "Score used to build the pioneer dummy"
	save pioneers_final_negbin.dta, replace
	
	
	